Missingness plots

First code bashing

# packages
library(ggplot2)
library(here)
## here() starts at C:/Users/Jonathan/Desktop/unimelb/PhD/missNetsSimulations
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)

# true estimates
resultsFiles = list.files(path = here("Output", "20230726_missNetsTrueModels"))

# initialise list
trueCoefList = list()
trueSeList = list()

# index for how missingness is saved
missSaveLabel = c("Peter", "Todd")
missSaveValue = c("NA", "0")

# TODO:: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO:: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]

# loop to load model parameters
for(netInd in 1:6){
  
  # load in results for each network
  load(here("Output", "20230726_missNetsTrueModels", resultsFiles[[netInd]]))
  
  # and take the coefficients only
  trueCoefList[[netInd]] = coef(modelres)
  trueSeList[[netInd]] = summary(modelres)$coefficients[,2]
}
## Warning: This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## Warning: This object was fit with 'ergm' version 4.2.2 or earlier. Summarizing
## it with version 4.3 or later may return incorrect results or fail.
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", chosenMissLabel))

# set up some lists to put in the ergm re-estimations
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()

# hmm... what's the best way to handle this....
# I have specific indices,
# but there are different amounts of each file because re-estimations tend to fail
# especially for some combinations

# some regular expressions
# net1Ind = grep("net1", outputList)
# missMod2Ind = grep("missModel2", outputList)
# 
# # check which indices overlap
# net1MissMod2 = missMod2Ind[missMod2Ind %in% net1Ind]

# set a specific index for the network
chosenNetInd = 2

# file list for the chosen network index
chosenNetOutFiles = grep(paste("20230814_missNetReest_net", chosenNetInd, sep =""), outputList)

##### For the MNAR files (coefSet1), the Miss Model is always 2... so what I can do is:
## regex the MNAR files 
chosenMnarOutFiles = grep(paste("20231011_missErgmNetReest_Miss", chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)

# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)

# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
  stop("Duplicate re-estimation datafiles! BAD.")
}

# loop to load
chosenNetOutList = outputList[combinedNetOutList]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", "Peter", chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  
  })
  
  
  # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
  
  # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
  if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
   missModelList[[fileInd]] = 4
  } else {
   missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }

  # propMiss is fine (or should be.)
  
  # regex to get the first number after the specified text
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  
}
# plotting
# note: I think I should have a plot for each missingness proportion
chosenProp = 1

# there should only be 3 miss props, 1 for 10%, 3.5 for 35%, and 6 for 60%
# subset the lists to only be a single chosen proportion
chosenPropIndex = propMissList == chosenProp

modelResChosenPropList = modelResList[chosenPropIndex]
modelSeChosenPropList = modelSeList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]

## AltStar
# making the dataset
altStarPlotData = data.frame(altStar = c(unlist(lapply(modelResChosenPropList, `[[`, 2) )), 
                             SE = c(unlist(lapply(modelSeChosenPropList, `[[`, 2))),
                             missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm")))

# order the plot data
altStarPlotOrd = altStarPlotData[order(altStarPlotData[,"altStar"]),]

# one more ordering with the missing model type
altStarPlotOrd = altStarPlotOrd[order(altStarPlotOrd[,"missModel"]),]



# getting the true altStar value
trueAltStar = trueCoefList[[chosenNetInd]][2]
trueAltStarSE = trueSeList[[chosenNetInd]][2]

# caterpillar?
altStarPlot = ggplot( data = altStarPlotOrd,
                    aes( x = 1:nrow(altStarPlotOrd), y = altStar, col = missModel)) + 
             geom_errorbar(aes (ymin = (altStar - 1.96*SE), ymax = (altStar + 1.96*SE), width = 0.4)) + 
             xlab("") + 
             ylab("AltStar estimate (95% Confidence Int)") + 
             geom_hline(yintercept = trueAltStar, col = "darkblue") + 
             geom_hline(yintercept = 0, col = "black", lty = 2) +
             geom_point() + 
             ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, sep = "")) + 
             theme_classic() + 
             theme(axis.ticks.x = element_blank(),
                   axis.text.x = element_blank()) +
         #          legend.position = "none") +
             geom_rect(aes(xmin = -Inf,
                           xmax = Inf, 
                           ymin = (trueAltStar - 1.96*trueAltStarSE),
                           ymax = (trueAltStar + 1.96*trueAltStarSE)),
                           alpha = 0.01,
                           colour = NA,
                           fill = "grey") +
             ylim(min(altStarPlotData$altStar) - 2*max(abs(altStarPlotData$SE)), max(altStarPlotData$altStar) + 2*max(abs(altStarPlotData$SE)))

altStarPlot

## Gwesp
# making the dataset
gwespPlotData = data.frame(gwesp = c(unlist(lapply(modelResChosenPropList, `[[`, 3) )), 
                             SE = c(unlist(lapply(modelSeChosenPropList, `[[`, 3))),
                             missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm")))

# order the plot data
gwespPlotOrd = gwespPlotData[order(gwespPlotData[,"gwesp"]),]

# one more ordering with the missing model type
gwespPlotOrd = gwespPlotOrd[order(gwespPlotOrd[,"missModel"]),]


# getting the true gwdeg value
truegwesp = trueCoefList[[chosenNetInd]][3]
truegwespSE = trueSeList[[chosenNetInd]][3]

# caterpillar?
gwespPlot = ggplot( data = gwespPlotOrd,
                    aes( x = 1:nrow(gwespPlotOrd), y = gwesp, col = missModel)) + 
             geom_errorbar(aes (ymin = (gwesp - 1.96*SE), ymax = (gwesp + 1.96*SE), width = 0.4)) + 
             xlab("") + 
             ylab("GWESP estimate (95% Confidence Int)") + 
             labs(col = "missType") + 
             geom_hline(yintercept = truegwesp, col = "darkblue") + 
             geom_hline(yintercept = 0, col = "black", lty = 2) +
             geom_point() + 
             ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, sep = "")) + 
             theme_classic() + 
             theme(axis.ticks.x = element_blank(),
                   axis.text.x = element_blank()) +
           #        legend.position = "none") +
             geom_rect(aes(xmin = -Inf,
                           xmax = Inf, 
                           ymin = (truegwesp - 1.96*truegwespSE),
                           ymax = (truegwesp + 1.96*truegwespSE)),
                           alpha = 0.01,
                           colour = NA,
                           fill = "grey") +
              ylim(min(gwespPlotOrd$gwesp) - 2*max(abs(gwespPlotOrd$SE)), max(gwespPlotOrd$gwesp) + 2*max(abs(gwespPlotOrd$SE)))
    

gwespPlot
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

# code to try computing a fail rate dataset

# fixed value for the amount of estimation attempts
estAttempts = 50

# choose a proportion
chosenProp = 1

# annoyingly how the missingness is saved is missing in this set of results so...
# there should only be one way the missingness is saved, keep it as a string
chosenMiss = chosenMissValue

# subset the lists to only be a single chosen proportion
# get the index
chosenPropIndex = propMissList == chosenProp

# and subset
propMissChosenPropList = propMissList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]

# these lists will always be used so...
# there should only be one missing proportion
# note that there can sometimes be more than one missingness proportion in the list...

# turn the list of missingness proportions into a single numeric value
failMissProp = as.numeric(names(table(unlist(propMissChosenPropList))))

# check to make sure there's only one proportion
if(length(failMissProp) != 1){
  stop("More than one missingness proportion after subsetting")
}

# calculate the success rate
successRate = as.numeric(table(unlist(missModelChosenPropList))/estAttempts)

# and which miss model it is, these can technically range from 1 to 4
failMissModel = as.numeric(names(table(unlist(missModelChosenPropList))))
  
# and put it all together
failData = data.frame(failRate = (1 - successRate), 
                      netInd = rep(chosenNetInd, length(failMissModel)),
                      missModel = failMissModel, 
                      missProp = rep(failMissProp, length(failMissModel)),
                      missSave = rep(chosenMiss, length(failMissModel)))

Turn them into functions

I want these functions to take the entire file list, choose specific networks and missingness proportions. The thing is, I might need a few helper functions beforehand to get the right data….

Multiple smaller functions doing very specific things are preferred over one giant function that does a lot of stuff all at once

### Function to turn the data from the extracted lists into plot-friendly format

# start from the list of coefficients and standard errors
# these can have varying rows per model because not all re-estimations converge


prepMissPlot = function(modelResList, modelSeList, missModelList, propMissList, chosenProp, chosenPara){

  ## prepMissPlot(modelResList, modelSeList, missModelList, propMissList, chosenProp, chosenPara) takes 
  ## four different containing various estimated values and extracted indices to be used 
  ## in making a plot-friendly dataset.
  ##
  ## Input:
  ## - modelResList: A k-long list containing the estimated model parameters for k re-estimated models.
  ##
  ## - modelSeList: A k-long list containing the estimated standard errors for k re-estimated models.
  ##                
  ## - missModelList: A k-long list containing the chosen missingness model index for k re-estimated models.
  ##                  
  ## - propMissList: A k-long list containing the missingness proportion index for k re-estimated models.
  ##
  ## - chosenProp: The specified proportion of missingness to plot. Three possible values here:
  ##               1 = 10%, 3 = 35%, 6 = 60%.
  ##
  ## - chosenPara: The specified parameter to plot. Three possible values here:
  ##               "edges" for edges, "altStar" for alternating stars, "gwesp" for gwesp.
  ##
  ## Output: 
  ## - A k x 3 dataframe containing the parameter values, standard error values, and missing model index.
  ##   The dataframe is ordered by parameter value (smallest to largest) for each missingness model.


  ## Check to make sure all the lists are of the same length
  listLengths = lapply(list(modelResList, modelSeList, missModelList, propMissList), FUN = length)
  
  ## A unit test that spits out an error if there's more than one unique length in the list of lengths
  if( length(unique(listLengths)) != 1 ){
    stop("Lists have differing lengths")
  }
  
  # subset the lists to only be a single chosen proportion
  chosenPropIndex = propMissList == chosenProp
  
  modelResChosenPropList = modelResList[chosenPropIndex]
  modelSeChosenPropList = modelSeList[chosenPropIndex]
  missModelChosenPropList = missModelList[chosenPropIndex]
  
  # Specify indices for the chosen parameter
  # doing it this way because parameter names get very lengthy
  paraIndex = c(1:3)
  modelParaNames = c("edges", "altStar", "gwesp")
  chosenParaIndex = paraIndex[modelParaNames == chosenPara]
  
  
  # making the dataset
  plotData = data.frame(parameter = c(unlist(lapply(modelResChosenPropList, `[[`, chosenParaIndex) )), 
                        SE = c(unlist(lapply(modelSeChosenPropList, `[[`, chosenParaIndex))),
                        missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm")))
  
  # change to allow 4 = mnarErgm and change missModel 2 = mcarERGM
  
  # order the plot data
  plotOrd = plotData[order(plotData[,"parameter"]),]
  
  # one more ordering with the missing model type
  plotOrd = plotOrd[order(plotOrd[,"missModel"]),]
  
  # return the ordered plot data
  return(plotOrd)
}

## actual plotting function

missCaterpillarPlot = function(plotData, truePara, trueSe, chosenPara, chosenNetInd, chosenProp, chosenMiss){
  
  ## missCaterpillarPlot(plotData, truePara, trueSe, chosenPara, chosenNetInd, chosenProp) is a very specialised
  ## plotting function that produces a caterpillar plot to compare parameter estimates for very specific data.
  ##
  ## Input:
  ## - plotData: Needs to be a k x 3 data frame for k re-estimated models containing parameter values,
  ##             standard error values, and a missing model index. Ordering is optional, but visually
  ##             useful. Ideally from prepMissPlot().
  ##
  ## - truePara: A single float (can technically be integer, but unlikely). Represents the true model estimate.
  ##                
  ## - trueSe: A single float (can technically be integer, but unlikely). Represents the true standard error.
  ##
  ## - chosenPara: A string with three (but really two) options. The chosen string is the parameter that is 
  ##               plotted. The options, in order, are "edges", "altStar", and "gwesp"
  ##
  ## - chosenNetInd: An integer between 1 to 6 for current purposes. Each integer represents one of the six
  ##                 datasets from UCINet chosen for the current round of re-estimations.
  ##                
  ## - chosenProp: A single integer or float with three options representing the proportion of missingness.
  ##               Options are 1 = 10%, 3.5 = 35%, 6 = 60%.
  ##
  ## - chosenMiss: A value to indicate what missingness is saved as (e.g., NA is Peter, 0 is Todd).
  ##               Input should be strings that are directly used for the label.
  ##                  
  ## Output: 
  ## - The output should be a caterpillar plot containing with k datapoints for k re-estimated models.
  ##   Colours are labelled and represent the missingness model used. True values are represented as a 
  ##   grey rectangle. All error bars or other depictions of spread are 95% confidence intervals. Plot
  ##   title should also be adaptive to the parameters in the function.
  
  # requires ggplot2 to work
  require(ggplot2)


  # caterpillar?
  caterPlot = ggplot( data = plotData,
                      aes( x = 1:nrow(plotData), y = parameter, col = missModel)) + 
               geom_errorbar(aes (ymin = (parameter - 1.96*SE), ymax = (parameter + 1.96*SE), width = 0.4)) + 
               xlab("") + 
               ylab(paste(chosenPara,"estimate (95% Confidence Int)", sep = " ")) + 
               labs(col = "missType") + 
               geom_hline(yintercept = truePara, col = "darkblue") + 
               geom_hline(yintercept = 0, col = "black", lty = 2) +
               geom_point() + 
               ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, " - Miss", chosenMiss ,sep = "")) + 
               theme_classic() + 
               theme(axis.ticks.x = element_blank(),
                     axis.text.x = element_blank()) +
               geom_rect(aes(xmin = -Inf,
                             xmax = Inf, 
                             ymin = (truePara - 1.96*trueSe),
                             ymax = (truePara + 1.96*trueSe)),
                             alpha = 0.01,
                             colour = NA,
                             fill = "grey") +
                ylim(min(min(plotData$parameter) - 2*max(abs(plotData$SE)), (truePara - 1.96*trueSe)), 
                     max(max(plotData$parameter) + 2*max(abs(plotData$SE)), (truePara + 1.96*trueSe)))
      
  # print out the plot
  caterPlot
  
    
}


## function for calculating failure rate (for each datafile read in)
# a function for this because I overwrite which networks I read in for the caterpillar plots
# this function captures every combination of which missingness model, which chosen missingness, which missingness proportion, and which network.

prepFailPlot = function(propMissList, missModelList, chosenNetInd, chosenProp, chosenMiss, estAttempts = 50){
  
  ## prepFailPlot(propMissList, missModelList, chosenNetInd, chosenProp, chosenMiss, estAttempts = 50) calculates
  ## the failure rates of re-estimations of degraded networks with specific object type requirements.
  ##
  ## Input:
  ## - missModelList: A k-long list containing the chosen missingness model index for k re-estimated models.
  ##                  Can technically range between 1 to 4.
  ##                  
  ## - propMissList: A k-long list containing the missingness proportion index for k re-estimated models.
  ##
  ## - chosenNetInd: An integer between 1 to 6 for current purposes. Each integer represents one of the six
  ##                 datasets from UCINet chosen for the current round of re-estimations.
  ##                
  ## - chosenProp: A single integer or float with three options representing the proportion of missingness.
  ##               Options are 1 = 10%, 3 = 35% (... yes, it's annoying.), 6 = 60%.
  ##
  ## - chosenMiss: A string to indicate what missingness is saved as (e.g., "NA" is Peter, "0" is Todd).
  ##                                
  ## Output: 
  ## - The output should be a data frame containing the failure rate for the specified combination of network, 
  ##   proportion of missingness, how the missingness is saved, and all the missingness models in the input list.
    
  
  # subset the lists to only be a single chosen proportion
  # get the index
  chosenPropIndex = propMissList == chosenProp
  
  # and subset
  propMissChosenPropList = propMissList[chosenPropIndex]
  missModelChosenPropList = missModelList[chosenPropIndex]

  # turn the list of missingness proportions into a single numeric value
  failMissProp = as.numeric(names(table(unlist(propMissChosenPropList))))
  
  # check to make sure there's only one proportion
  if(length(failMissProp) != 1){
    stop("More than one missingness proportion after subsetting")
  }
  
  # calculate the success rate
  successRate = as.numeric(table(unlist(missModelChosenPropList))/estAttempts)
  
  # and which miss model it is, these can technically range from 1 to 4
  failMissModel = as.numeric(names(table(unlist(missModelChosenPropList))))
    
  # and put it all together
  failData = data.frame(failRate = (1 - successRate), 
                        netInd = rep(chosenNetInd, length(failMissModel)),
                        missModel = failMissModel, 
                        missProp = rep(failMissProp, length(failMissModel)),
                        missSave = rep(chosenMiss, length(failMissModel)))
  
  
  # return the data frame
  return(failData)

}

## no need for a function to plot fail rates for each combination since I would only want one plot

Function test run

# choose a network
chosenNetInd = 2

# generate relative bias and relative variance (...standard error?) data structures
relBiasData = data.frame(rBias = c(), netInd = c(), missSave = c(), propMiss = c(), missModel = c(), paraName = c())
relSeData = data.frame(rSe = c(),  netInd = c(), missSave = c(), propMiss = c(), missModel = c(), paraName = c())

# select a parameter
modelParaOptions = c("edges", "altStar", "gwesp")

# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
  
  # grab the chosen parameter name
  chosenParaName = modelParaOptions[chosenPara]
  
  # still need to take out true parameter values
  tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
  tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
  
  # including edges
  # test run of the plot function
  testPlotData = prepMissPlot(modelResList = modelResList,
                              modelSeList = modelSeList,
                              missModelList = missModelList,
                              propMissList = propMissList,
                              chosenProp = chunkMissProp,
                              chosenPara = chosenParaName)
  
  # brief inspection just to make sure nothing's wrong
  head(testPlotData) %>% 
    print()
  
  
  # and plot it
  missCaterpillarPlot(plotData = testPlotData,
                      truePara = tempTruePara,
                      trueSe = tempTrueSe,
                      chosenPara = chosenParaName,
                      chosenNetInd = chosenNetInd,
                      chosenProp = chunkMissProp,
                      chosenMiss = chosenMissValue) %>% 
    print()

  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  
}
##    parameter       SE missModel
## 8  -3.572767 1.424147     Indep
## 11 -1.985151 2.002649     Indep
## 16 -1.838034 2.499750     Indep
## 4  -1.770333 2.842933     Indep
## 5  -1.659360 2.455153     Indep
## 17 -1.632414 2.516041     Indep

##    parameter        SE missModel
## 7  -2.667740 1.1581335     Indep
## 6  -2.651687 1.1342895     Indep
## 10 -2.519181 0.8830086     Indep
## 13 -2.484591 0.9431191     Indep
## 14 -2.391916 0.9567384     Indep
## 18 -2.339140 0.8401319     Indep

##    parameter        SE missModel
## 15  3.713921 0.4693278     Indep
## 3   3.805494 0.5092882     Indep
## 18  3.847856 0.5059986     Indep
## 12  3.867887 0.5433778     Indep
## 17  3.937885 0.5014629     Indep
## 11  3.945033 0.5001198     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

# test run for the fail rate data
# a data frame to initialise
failRateData = data.frame()

# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
                            missModelList = missModelList,
                            chosenNetInd = chosenNetInd,
                            chosenProp = chunkMissProp,
                            chosenMiss = chosenMissValue)

# and bind it
failRateData = rbind(failRateData, tempFailData)

Rest of 10% Peter

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20230814_missNetReest_net",
                    "20230814_missNetReest_net",
                    "20231010_missNetReest_net")

chunkMnarNames = c("20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all chosen networks
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  
  }
  
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  
  }
    

  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##     parameter       SE missModel
## 27 -2.0450471 1.355888     Indep
## 17 -1.6887316 1.372465     Indep
## 46 -1.0782637 1.986377     Indep
## 14 -1.0079846 1.794369     Indep
## 30 -0.9901474 1.690993     Indep
## 23 -0.9677308 1.709223     Indep

##    parameter        SE missModel
## 29 -2.380226 0.8948844     Indep
## 34 -2.332890 0.8567828     Indep
## 39 -2.225284 0.6481276     Indep
## 12 -2.203632 0.7067738     Indep
## 25 -2.197335 0.7339564     Indep
## 19 -2.194530 0.6131555     Indep

##    parameter        SE missModel
## 42  2.795684 0.4150812     Indep
## 35  2.841932 0.3840148     Indep
## 5   2.906557 0.3822707     Indep
## 33  2.937962 0.3854581     Indep
## 47  2.939097 0.4309207     Indep
## 11  2.950955 0.3905500     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##    parameter        SE missModel
## 13 -5.050360        NA     Indep
## 9  -4.101907 0.4138049     Indep
## 12 -4.095339 0.3877220     Indep
## 5  -4.089140 0.3838097     Indep
## 2  -4.052289 0.4108890     Indep
## 14 -4.042779 0.4425365     Indep

##     parameter        SE missModel
## 14 -0.4271309 0.2344825     Indep
## 1  -0.3689833 0.2283937     Indep
## 9  -0.3686057 0.2258536     Indep
## 2  -0.3471691 0.2368848     Indep
## 17 -0.3043420 0.2638864     Indep
## 16 -0.2999977 0.2291898     Indep

##    parameter        SE missModel
## 13 0.5529485        NA     Indep
## 15 1.3536200 0.3463476     Indep
## 4  1.4155391 0.3970444     Indep
## 10 1.5321071 0.3904505     Indep
## 3  1.5354860 0.3464884     Indep
## 11 1.5899620 0.3939409     Indep

##    parameter       SE missModel
## 33 -6.661065 1.161756     Indep
## 43 -6.520892 1.108532     Indep
## 9  -6.430122 1.040719     Indep
## 42 -6.423533 1.099057     Indep
## 40 -6.348662 1.106593     Indep
## 10 -6.331207 1.156858     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter        SE missModel
## 28 -0.9007415 0.2363367     Indep
## 45 -0.7825025 0.2400551     Indep
## 44 -0.7622703 0.2476933     Indep
## 18 -0.7536412 0.2325391     Indep
## 3  -0.7459583 0.2495798     Indep
## 16 -0.7435994 0.2477373     Indep

##    parameter        SE missModel
## 10  1.053478 0.1945004     Indep
## 29  1.204949 0.1979167     Indep
## 19  1.224828 0.2129165     Indep
## 42  1.239121 0.2127094     Indep
## 33  1.246916 0.2216857     Indep
## 9   1.261516 0.2118016     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

More missingness conditions

35% Peter

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231004_missNetReest_net",
                    "20231004_missNetReest_net",
                    "20231004_missNetReest_net",
                    "20231010_missNetReest_net")

chunkMnarNames = c("20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
    # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissPropFloat,
                        chosenMiss = chosenMissValue) %>% 
      print()
    
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  
  }
  

  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##     parameter       SE missModel
## 7  -3.3781085 1.571063     Indep
## 6  -2.8192660 1.761062     Indep
## 5  -1.2349580 3.762309     Indep
## 10 -1.0923401 4.398689     Indep
## 2  -0.9781865 3.853383     Indep
## 8  -0.5903796 2.911597     Indep

##    parameter       SE missModel
## 12 -4.558152 4.043885     Indep
## 14 -3.474548 2.506201     Indep
## 13 -3.227319 2.943347     Indep
## 1  -3.187661 2.242548     Indep
## 16 -2.702853 1.253112     Indep
## 15 -2.632289 1.020986     Indep

##    parameter        SE missModel
## 4   3.323302 0.5682986     Indep
## 1   3.437627 0.6409324     Indep
## 14  3.484401 0.6621262     Indep
## 8   3.566714 0.6259340     Indep
## 3   3.696938 0.6449210     Indep
## 11  3.741881 0.6132945     Indep

##    parameter       SE missModel
## 16 -2.540926 1.468209     Indep
## 33 -2.529681 1.691315     Indep
## 35 -2.427015 1.466072     Indep
## 29 -2.020486 1.593190     Indep
## 26 -1.943041 1.608631     Indep
## 1  -1.765409 1.619538     Indep

##    parameter       SE missModel
## 12 -3.536047 1.825336     Indep
## 23 -3.329675 2.333945     Indep
## 18 -2.818693 1.409608     Indep
## 37 -2.728979 1.230028     Indep
## 34 -2.644825 1.234967     Indep
## 10 -2.596926 1.057250     Indep

##    parameter        SE missModel
## 3   2.497359 0.4892193     Indep
## 7   2.510058 0.4429165     Indep
## 30  2.617554 0.5050321     Indep
## 2   2.640901 0.5463155     Indep
## 27  2.712829 0.4962674     Indep
## 13  2.731805 0.4496286     Indep

##    parameter        SE missModel
## 6  -4.763305 0.5927591     Indep
## 12 -4.423675 0.5544808     Indep
## 1  -4.256021 0.5488157     Indep
## 15 -4.180141 0.5269770     Indep
## 2  -4.162467 0.5045574     Indep
## 5  -4.153906 0.5770239     Indep

##     parameter        SE missModel
## 6  -0.6946950 0.2194906     Indep
## 5  -0.6897158 0.2631167     Indep
## 15 -0.6294828 0.2489500     Indep
## 12 -0.5802322 0.2540797     Indep
## 1  -0.5784914 0.2739039     Indep
## 10 -0.5771192 0.2790234     Indep

##    parameter        SE missModel
## 4   1.137951 0.4996599     Indep
## 8   1.178245 0.4959795     Indep
## 14  1.329230 0.3960599     Indep
## 7   1.429522 0.4758393     Indep
## 13  1.560362 0.4546985     Indep
## 17  1.770557 0.5469616     Indep

##    parameter       SE missModel
## 9  -6.980429 1.139007     Indep
## 11 -6.766356 1.286395     Indep
## 45 -6.555106 1.242611     Indep
## 48 -6.540552 1.100341     Indep
## 25 -6.434793 1.285338     Indep
## 41 -6.358277 1.639629     Indep

##    parameter        SE missModel
## 14 -1.277323 0.3909004     Indep
## 23 -1.185686 0.2943182     Indep
## 32 -1.167117 0.2680503     Indep
## 31 -1.159366 0.3823150     Indep
## 16 -1.136381 0.3262691     Indep
## 7  -1.063438 0.2624494     Indep

##    parameter        SE missModel
## 41 0.8504767 0.2874547     Indep
## 36 0.9067248 0.2625241     Indep
## 50 1.1343697 0.3153175     Indep
## 18 1.2385265 0.2613634     Indep
## 22 1.2590632 0.3627478     Indep
## 27 1.2599829 0.3063573     Indep

60% Peter

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231011_missNetReest_net",
                    "20231011_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  }
  
  
  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter       SE missModel
## 5  -5.789352 2.062231     Indep
## 20 -4.598751 1.850594     Indep
## 8  -3.903931 2.399643     Indep
## 12 -3.574117 2.122361     Indep
## 9  -2.868342 1.920461     Indep
## 21 -1.942612 1.960238     Indep

##     parameter        SE missModel
## 7  -12.189565 14.285280     Indep
## 10  -8.636441  7.301452     Indep
## 1   -7.742465  8.269611     Indep
## 3   -7.435589  7.489878     Indep
## 14  -5.732763  5.862388     Indep
## 2   -3.727859  4.052053     Indep

##    parameter        SE missModel
## 4   2.565689 0.6949242     Indep
## 21  2.737034 0.7987535     Indep
## 6   2.812536 0.7580205     Indep
## 16  3.077702 0.9303100     Indep
## 18  3.192135 0.7561982     Indep
## 19  3.204291 0.7683349     Indep

##    parameter       SE missModel
## 6  -9.066671       NA     Indep
## 35 -3.983056 1.482096     Indep
## 25 -3.688016 1.425409     Indep
## 10 -2.518948 1.491961     Indep
## 24 -2.343804 2.379993     Indep
## 28 -2.265829 2.084995     Indep

##     parameter       SE missModel
## 13 -10.600876 9.199102     Indep
## 12  -7.608177 5.976598     Indep
## 4   -6.967102 4.550087     Indep
## 8   -5.107308 3.390178     Indep
## 32  -3.863878 3.145042     Indep
## 16  -3.732614 2.720044     Indep

##    parameter        SE missModel
## 6   1.157657        NA     Indep
## 11  1.498300 0.8333988     Indep
## 26  1.608398 0.7615361     Indep
## 17  2.343988 0.8712003     Indep
## 1   2.363417 0.6172139     Indep
## 14  2.388523 0.5638172     Indep

##    parameter        SE missModel
## 18 -7.915567        NA     Indep
## 10 -6.006189        NA     Indep
## 4  -5.514273 1.1385876     Indep
## 15 -5.061362 0.9744425     Indep
## 13 -4.919149 1.1736797     Indep
## 8  -4.631797 0.8727901     Indep

##     parameter        SE missModel
## 4  -1.0541759 0.1900847     Indep
## 13 -0.8025529 0.2567004     Indep
## 19 -0.7912228 0.2738069     Indep
## 15 -0.7703994 0.2574297     Indep
## 2  -0.7660896 0.3085943     Indep
## 11 -0.7373803 0.3518431     Indep

##    parameter        SE missModel
## 10 0.7233999        NA     Indep
## 6  0.8481073 0.6061540     Indep
## 1  1.6349917 0.7578088     Indep
## 12 1.8921170 0.7662248     Indep
## 20 1.9075172 0.8669149     Indep
## 11 1.9556487 0.6164461     Indep

##    parameter       SE missModel
## 4  -8.649119 2.080996     Indep
## 26 -8.272475 2.215487     Indep
## 11 -7.824375 1.694336     Indep
## 47 -7.136581 2.073979     Indep
## 32 -6.928534 1.651045     Indep
## 33 -6.845862 1.545135     Indep

##    parameter        SE missModel
## 44 -2.062308 0.9548628     Indep
## 41 -1.750173 1.2046102     Indep
## 10 -1.622288 0.7202489     Indep
## 14 -1.518258 0.6462700     Indep
## 28 -1.463635 0.9698189     Indep
## 1  -1.456279 0.4954156     Indep

##    parameter        SE missModel
## 47 0.6587452 0.4476198     Indep
## 26 0.6732766 0.4701941     Indep
## 11 0.8695871 0.5348307     Indep
## 4  0.9417435 0.4040563     Indep
## 9  1.0394754 0.4892135     Indep
## 20 1.0694637 0.5600053     Indep

10% Todd

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231012_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
    
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)  
  }
  

  
  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter       SE missModel
## 8  -2.601463 1.256855     Indep
## 24 -2.413046 1.108934     Indep
## 10 -2.395926 1.374799     Indep
## 15 -2.383965 1.253623     Indep
## 9  -2.176266 1.247728     Indep
## 38 -2.170672 1.111399     Indep

##    parameter        SE missModel
## 5  -2.209327 0.5979698     Indep
## 23 -2.189741 0.6877216     Indep
## 30 -2.066400 0.5400037     Indep
## 4  -2.026562 0.4440535     Indep
## 13 -1.986430 0.4411822     Indep
## 39 -1.977470 0.4192232     Indep

##    parameter        SE missModel
## 2   2.409912 0.2867801     Indep
## 25  2.416764 0.2709456     Indep
## 3   2.762928 0.3026043     Indep
## 31  2.874716 0.3174043     Indep
## 26  2.911591 0.3013970     Indep
## 6   2.946111 0.3113748     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter       SE missModel
## 2  -1.6490952 1.703432     Indep
## 9  -1.4773923 1.747493     Indep
## 23 -1.3377862 1.848695     Indep
## 19 -0.9747808 1.645787     Indep
## 12 -0.8558571 2.243823     Indep
## 5  -0.7947030 2.103760     Indep

##    parameter        SE missModel
## 21 -2.568440 1.1667600     Indep
## 14 -2.378623 0.8922353     Indep
## 36 -2.341166 0.9318989     Indep
## 8  -2.336268 1.1878642     Indep
## 46 -2.330456 0.8169489     Indep
## 48 -2.304476 1.1932852     Indep

##    parameter        SE missModel
## 5   1.549235 0.3025049     Indep
## 29  1.710355 0.3314923     Indep
## 24  1.878453 0.3544091     Indep
## 44  1.900765 0.3554659     Indep
## 18  2.099150 0.3389286     Indep
## 39  2.122251 0.3851450     Indep

##     parameter       SE missModel
## 29 -1.6332365 1.153097     Indep
## 48 -1.3210907 1.395869     Indep
## 22 -0.9199276 1.420857     Indep
## 18 -0.7142808 1.432538     Indep
## 12 -0.6703803 1.449203     Indep
## 10 -0.6670701 1.636150     Indep

##    parameter        SE missModel
## 34 -2.003352 0.6859301     Indep
## 26 -1.947103 0.6863618     Indep
## 44 -1.940004 0.6037775     Indep
## 38 -1.911503 0.5101568     Indep
## 30 -1.903417 0.4739233     Indep
## 6  -1.901365 0.7485541     Indep

##    parameter        SE missModel
## 48  1.591746 0.3083361     Indep
## 4   1.612123 0.2829673     Indep
## 18  1.694798 0.2803756     Indep
## 39  1.788530 0.2972528     Indep
## 26  1.793877 0.2787256     Indep
## 6   1.799936 0.2751293     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##    parameter        SE missModel
## 6  -4.264797 0.3877630     Indep
## 4  -4.192364 0.3555877     Indep
## 11 -4.155292 0.3629730     Indep
## 15 -4.110912 0.3622067     Indep
## 3  -4.093634 0.3679711     Indep
## 14 -4.090357 0.3716855     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter        SE missModel
## 1  -0.1933919 0.2208256     Indep
## 11 -0.1928590 0.2280006     Indep
## 7  -0.1823017 0.2226638     Indep
## 14 -0.1312494 0.2360561     Indep
## 15 -0.1227517 0.2318603     Indep
## 13 -0.1205444 0.2556446     Indep

##    parameter        SE missModel
## 6   0.895099 0.2581558     Indep
## 9   1.169641 0.3098932     Indep
## 8   1.205350 0.3021720     Indep
## 5   1.214879 0.3145655     Indep
## 10  1.268217 0.3100610     Indep
## 4   1.294713 0.3410631     Indep

##    parameter       SE missModel
## 9  -7.562327 1.097650     Indep
## 5  -7.320652 1.176731     Indep
## 20 -7.263994 1.127838     Indep
## 33 -7.255869 1.161143     Indep
## 2  -7.248503 1.104335     Indep
## 50 -7.185518 1.135164     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter        SE missModel
## 49 -0.5511893 0.2380867     Indep
## 28 -0.5461336 0.2205140     Indep
## 18 -0.5001765 0.2197595     Indep
## 6  -0.4852246 0.2300400     Indep
## 43 -0.4411331 0.2405094     Indep
## 41 -0.4401505 0.2326350     Indep

##    parameter        SE missModel
## 10 0.7147952 0.1443590     Indep
## 20 0.7862908 0.1544625     Indep
## 5  0.8066656 0.1520125     Indep
## 9  0.8667842 0.1670487     Indep
## 40 0.8687591 0.1599717     Indep
## 11 0.8730449 0.1547718     Indep

35% Todd

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231012_missNetReest_net",
                    "20231012_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissPropFloat,
                        chosenMiss = chosenMissValue) %>% 
      print()

  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  }
  
  

  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter        SE missModel
## 13 -3.337059 0.8859680     Indep
## 2  -2.860776 0.7802048     Indep
## 18 -2.851610 1.0974701     Indep
## 37 -2.726147 0.8829394     Indep
## 9  -2.508242 0.9298530     Indep
## 3  -2.472002 0.8998251     Indep

##    parameter        SE missModel
## 15 -1.436287 0.4408012     Indep
## 44 -1.234459 0.5375122     Indep
## 4  -1.200908 0.4771346     Indep
## 38 -1.151209 0.4432072     Indep
## 12 -1.119609 0.3651799     Indep
## 33 -1.107068 0.5208508     Indep

##    parameter        SE missModel
## 13 0.8143432 0.1433313     Indep
## 22 0.9230283 0.1460986     Indep
## 11 0.9431998 0.1522376     Indep
## 1  0.9683054 0.1539687     Indep
## 30 0.9840596 0.1521053     Indep
## 39 1.0125468 0.1555323     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##    parameter       SE missModel
## 18 -3.903673 1.139670     Indep
## 21 -2.816481 1.616045     Indep
## 19 -2.459853 1.432304     Indep
## 47 -2.184766 1.735768     Indep
## 4  -1.885313 1.392126     Indep
## 6  -1.847751 1.171954     Indep

##    parameter        SE missModel
## 50 -2.431769 0.9117054     Indep
## 41 -1.988648 0.9647086     Indep
## 38 -1.952683 0.7673793     Indep
## 25 -1.862815 1.0076413     Indep
## 34 -1.652086 0.6791568     Indep
## 27 -1.521875 0.7766990     Indep

##    parameter        SE missModel
## 18 0.1781799 0.1574842     Indep
## 21 0.3824131 0.1777333     Indep
## 19 0.4221073 0.1832613     Indep
## 9  0.4592685 0.1906219     Indep
## 47 0.4712505 0.1916285     Indep
## 5  0.5133984 0.1805051     Indep

##    parameter        SE missModel
## 6  -2.659930 0.8906202     Indep
## 43 -2.484435 0.9949309     Indep
## 42 -2.379924 1.0514690     Indep
## 2  -2.249203 1.0359835     Indep
## 19 -2.083134 1.2432392     Indep
## 35 -1.883939 1.1410617     Indep

##    parameter        SE missModel
## 29 -1.382605 0.6180631     Indep
## 8  -1.350535 0.4879766     Indep
## 24 -1.324427 0.4870290     Indep
## 13 -1.320548 0.5947146     Indep
## 20 -1.271720 0.5316526     Indep
## 11 -1.204599 0.5222904     Indep

##    parameter        SE missModel
## 1  0.4634877 0.1972899     Indep
## 38 0.5640863 0.1936564     Indep
## 19 0.5753617 0.2009085     Indep
## 31 0.5896167 0.2008107     Indep
## 33 0.6195691 0.1980860     Indep
## 50 0.6667230 0.2139163     Indep

##   parameter        SE missModel
## 2 -4.417349 0.4255926     Indep
## 1 -4.366253 0.3996069     Indep
## 3 -4.016838 0.3891978     Indep
## 6 -4.546791 0.4310100  mcarERGM
## 5 -4.453206 0.4022960  mcarERGM
## 7 -4.263979 0.3547582  mcarERGM
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter        SE missModel
## 3 -0.02212587 0.2536399     Indep
## 1  0.30771625 0.2589552     Indep
## 2  0.39936734 0.2535769     Indep
## 4 -0.14067356 0.2359803  mcarERGM
## 7 -0.04922777 0.2510923  mcarERGM
## 5  0.43782339 0.2521844  mcarERGM

##   parameter        SE missModel
## 2 0.6856637 0.2854993     Indep
## 1 0.8319856 0.3128379     Indep
## 3 1.2445683 0.3393180     Indep
## 6 0.4876890 0.2788660  mcarERGM
## 5 0.6333639 0.2932883  mcarERGM
## 7 1.4323621 0.3900154  mcarERGM

##    parameter       SE missModel
## 9  -9.680129 1.321978     Indep
## 24 -9.409439 1.341914     Indep
## 19 -9.138619 1.273927     Indep
## 22 -8.715590 1.274077     Indep
## 5  -8.703109 1.269927     Indep
## 14 -8.598590 1.232700     Indep

##     parameter        SE missModel
## 34 -0.4271915 0.2338981     Indep
## 35 -0.2108234 0.2405158     Indep
## 29 -0.1825741 0.2283651     Indep
## 39 -0.1809863 0.2414358     Indep
## 12 -0.1535835 0.2152647     Indep
## 49 -0.0967697 0.2282145     Indep

##    parameter        SE missModel
## 24 0.2549825 0.1382805     Indep
## 43 0.2645356 0.1409349     Indep
## 3  0.3558289 0.1495326     Indep
## 10 0.3681067 0.1433609     Indep
## 41 0.3699661 0.1499506     Indep
## 19 0.3730467 0.1402606     Indep

60% Todd

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231006_missNetReest_net",
                    "20231006_missNetReest_net",
                    "20231006_missNetReest_net",
                    "20231011_missNetReest_net",
                    "20231011_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}

  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
    
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)  
  }

  

  
  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter        SE missModel
## 31 -4.460967 0.5810290     Indep
## 17 -4.391896 0.6414831     Indep
## 21 -4.251427 0.7467956     Indep
## 34 -4.235010 0.7519899     Indep
## 18 -3.938098 0.7389818     Indep
## 1  -3.887855 0.7437958     Indep

##      parameter        SE missModel
## 20 -0.54170566 0.3171848     Indep
## 24 -0.31403746 0.3400528     Indep
## 7  -0.24488276 0.3682580     Indep
## 19 -0.22044346 0.2847240     Indep
## 3  -0.07633112 0.2590887     Indep
## 29 -0.05303975 0.2633457     Indep

##    parameter        SE missModel
## 15 0.1147738 0.1344550     Indep
## 2  0.2438565 0.1288911     Indep
## 17 0.2489000 0.1466968     Indep
## 12 0.2720479 0.1266465     Indep
## 28 0.2784234 0.1299738     Indep
## 23 0.2830477 0.1217562     Indep

##    parameter        SE missModel
## 34 -4.117064 0.7813483     Indep
## 8  -3.866036 0.8600851     Indep
## 22 -3.575605 0.8914025     Indep
## 2  -3.486708 1.0883342     Indep
## 41 -3.341053 1.1678912     Indep
## 35 -3.238912 0.8570279     Indep

##     parameter        SE missModel
## 11 -1.6951627 0.7913244     Indep
## 45 -1.0758965 0.4881540     Indep
## 16 -0.6649367 0.4877096     Indep
## 24 -0.5145593 0.5383474     Indep
## 5  -0.5060344 0.3733735     Indep
## 26 -0.3739797 0.5027186     Indep

##     parameter        SE missModel
## 28 0.02784400 0.1756171     Indep
## 42 0.03321241 0.2095743     Indep
## 2  0.07508869 0.1726614     Indep
## 25 0.11500553 0.1873731     Indep
## 34 0.12822292 0.1818427     Indep
## 1  0.13671639 0.1920777     Indep

##    parameter        SE missModel
## 21 -4.186214 0.8410843     Indep
## 15 -3.781792 0.7574983     Indep
## 39 -3.415426 0.7451268     Indep
## 30 -3.321968 0.8540362     Indep
## 40 -3.248794 0.9085335     Indep
## 14 -3.190952 0.7852334     Indep

##     parameter        SE missModel
## 31 -0.9245958 0.4509537     Indep
## 19 -0.7191358 0.4810619     Indep
## 17 -0.4522326 0.4244435     Indep
## 36 -0.4507429 0.4204314     Indep
## 9  -0.4244602 0.3580949     Indep
## 8  -0.3759830 0.3769310     Indep

##      parameter        SE missModel
## 21 -0.32176800 0.2684163     Indep
## 15  0.01944446 0.2657635     Indep
## 42  0.07392221 0.2709689     Indep
## 6   0.13021100 0.2911042     Indep
## 41  0.19774587 0.2389817     Indep
## 40  0.21448691 0.2095101     Indep

##       parameter         SE missModel
## 1     -4.749368  0.4281441     Indep
## 3     -4.298512  0.5375795     Indep
## 2     -4.171256  0.5552278     Indep
## 8 -17812.803613 10.6302992  mcarERGM
## 5     -4.781183  0.5044661  mcarERGM
## 4     -4.535313  0.5246310  mcarERGM

##   parameter        SE missModel
## 2 0.1902914 0.3616075     Indep
## 3 0.4397348 0.2924576     Indep
## 1 0.6479313 0.2579289     Indep
## 7 0.1221795 0.3665846  mcarERGM
## 6 0.3604831 0.3471392  mcarERGM
## 4 0.6110184 0.2724064  mcarERGM

##     parameter            SE missModel
## 1   0.3854443  3.015044e-01     Indep
## 3   0.4696144  3.230628e-01     Indep
## 2   0.8850460  4.268974e-01     Indep
## 8 -22.0702887 2.680862e+115  mcarERGM
## 5   0.1396263  3.000309e-01  mcarERGM
## 4   0.2854597  2.927772e-01  mcarERGM

##    parameter       SE missModel
## 4  -11.67967 1.761618     Indep
## 49 -11.60233 1.668491     Indep
## 19 -10.86087 1.509740     Indep
## 33 -10.81512 1.729544     Indep
## 34 -10.27515 1.710123     Indep
## 26 -10.15169 1.589956     Indep

##      parameter        SE missModel
## 41 -0.29712466 0.2683588     Indep
## 33 -0.20487467 0.2645884     Indep
## 24 -0.18751920 0.2627259     Indep
## 12 -0.14089183 0.2527441     Indep
## 2   0.01010340 0.2545368     Indep
## 5   0.02821066 0.2418109     Indep

##       parameter        SE missModel
## 50 -0.164454094 0.2209245     Indep
## 25 -0.131737953 0.1781773     Indep
## 18 -0.093227217 0.2367062     Indep
## 29 -0.082722105 0.2330193     Indep
## 42 -0.040174880 0.2351467     Indep
## 31  0.006732098 0.1901246     Indep

Sparse plots

The plots below are specifically plots with very sparse amounts of successful re-estimations.

10% Peter

# 10% missingness - Peter
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)

# note: 
# net 3 is so messed up it has ONE converged estimation for this set 
# 20231010_missNetReest_net3_missModel1_prop1_trial9.RData

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")

chunkMnarNames = c("20231011_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
   
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##     parameter       SE missModel
## 1  0.06730704 2.831189  mnarErgm
## 6  0.41110006 2.700997  mnarErgm
## 10 0.44803684 3.233691  mnarErgm
## 7  0.62303532 3.283356  mnarErgm
## 8  1.28406049 3.326750  mnarErgm
## 9  1.40853051 3.443864  mnarErgm

##    parameter        SE missModel
## 5  -3.012581 1.1944335  mnarErgm
## 3  -2.811731 1.0764746  mnarErgm
## 11 -2.705572 0.9157626  mnarErgm
## 2  -2.677974 0.9916606  mnarErgm
## 4  -2.656809 0.9220835  mnarErgm
## 9  -2.590910 0.8378803  mnarErgm

##   parameter        SE missModel
## 5  3.275134 0.3421808  mnarErgm
## 9  3.344275 0.3413299  mnarErgm
## 2  3.398827 0.3541200  mnarErgm
## 8  3.406120 0.3646437  mnarErgm
## 3  3.407984 0.3722419  mnarErgm
## 4  3.423570 0.3405085  mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

# the 'converged' model for net 3
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()

# set a specific index for the network
chosenNetInd = 3

# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231010_missNetReest_net", chosenNetInd, sep =""), outputList)


# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  
  })
  
   missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    
  # propmiss should be the same
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}

  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
##   parameter SE missModel
## 1 -89.26956 NA     Indep

##   parameter SE missModel
## 1   21.2504 NA     Indep

##   parameter SE missModel
## 1  2.785266 NA     Indep

# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
                            missModelList = missModelList,
                            chosenNetInd = chosenNetInd,
                            chosenProp = chunkMissProp,
                            chosenMiss = chosenMissValue)

# and bind it
failRateData = rbind(failRateData, tempFailData)

35% Peter

# 35% missingness
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)

# note: 
# net 3 is still so messed up it has three converged estimation for this set (out of 200)
# 20231010_missNetReest_net3_missModel2_prop3.5_trial11.RData
# 20231010_missNetReest_net3_missModel3_prop3.5_trial12.RData
# 20231010_missNetReest_net3_missModel3_prop3.5_trial19.RData

# no mnars, so the loop won't work.

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")

chunkMnarNames = c("20231011_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissPropFloat,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
   
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter       SE missModel
## 3   1.354497 2.909963  mnarErgm
## 18  2.906806 2.973310  mnarErgm
## 13  2.981297 3.469670  mnarErgm
## 9   3.067972 3.937823  mnarErgm
## 2   3.332524 3.476768  mnarErgm
## 4   3.417883 3.512137  mnarErgm

##    parameter       SE missModel
## 10 -4.981270 1.855869  mnarErgm
## 20 -4.887841 1.760132  mnarErgm
## 6  -4.644249 1.794332  mnarErgm
## 8  -4.517936 1.729852  mnarErgm
## 16 -4.301038 1.535839  mnarErgm
## 11 -4.210703 1.294873  mnarErgm

##    parameter        SE missModel
## 12  1.962097 0.2659402  mnarErgm
## 16  2.015492 0.2764007  mnarErgm
## 11  2.038997 0.2944418  mnarErgm
## 19  2.079809 0.3464684  mnarErgm
## 7   2.083504 0.3216842  mnarErgm
## 6   2.096097 0.2653546  mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

### A section specifically for the three re-estimations of net3 that made it
# no MNAR files, so removing them from the code below

# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()

# set a specific index for the network
chosenNetInd = 3

# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231010_missNetReest_net", chosenNetInd, sep =""), outputList)


# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  
  })
  
   missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    
  # propmiss should be the same
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}

# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
  
  # grab the chosen parameter name
  chosenParaName = modelParaOptions[chosenPara]
  
  # still need to take out true parameter values
  tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
  tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
  
  # including edges
  # test run of the plot function
  testPlotData = prepMissPlot(modelResList = modelResList,
                              modelSeList = modelSeList,
                              missModelList = missModelList,
                              propMissList = propMissList,
                              chosenProp = chunkMissProp,
                              chosenPara = chosenParaName)
  
  # brief inspection just to make sure nothing's wrong
  head(testPlotData) %>% 
    print()
  
  
  # and plot it
  missCaterpillarPlot(plotData = testPlotData,
                      truePara = tempTruePara,
                      trueSe = tempTrueSe,
                      chosenPara = chosenParaName,
                      chosenNetInd = chosenNetInd,
                      chosenProp = chunkMissPropFloat,
                      chosenMiss = chosenMissValue) %>% 
    print()

}
##   parameter SE missModel
## 1 -59.62898 NA  mcarERGM
## 2 -31.73605 NA    Latent
## 3 -17.05925 NA    Latent

##   parameter SE missModel
## 1 15.661456 NA  mcarERGM
## 2  1.350935 NA    Latent
## 3  5.424564 NA    Latent

##    parameter SE missModel
## 1 -0.2770482 NA  mcarERGM
## 3 -1.1609499 NA    Latent
## 2 12.4001035 NA    Latent

# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
                            missModelList = missModelList,
                            chosenNetInd = chosenNetInd,
                            chosenProp = chunkMissProp,
                            chosenMiss = chosenMissValue)

# and bind it
failRateData = rbind(failRateData, tempFailData)

60% Peter

# 60% Peter
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)

# note: 
# net 3 is still so messed up it has two converged estimation for this set (out of 200)
# 20231011_missNetReest_net3_missModel2_prop6_trial40
# 20231011_missNetReest_net3_missModel3_prop6_trial46

# no mnars, so the loop won't work.

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
   
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##     parameter       SE missModel
## 1  -19.471644       NA     Indep
## 2   -5.164890 1.341253     Indep
## 3  -12.764592       NA  mcarERGM
## 12  -4.443987 1.069731  mnarErgm
## 8   -4.212305 1.364534  mnarErgm
## 26  -4.030651 1.201752  mnarErgm

##     parameter         SE missModel
## 2   -1.676416  0.2491544     Indep
## 1    3.753020         NA     Indep
## 3    2.523763         NA  mcarERGM
## 24 -16.309377 12.3213518  mnarErgm
## 31  -7.352189  4.5866855  mnarErgm
## 13  -6.388176  3.7709916  mnarErgm

##    parameter        SE missModel
## 1   2.895285        NA     Indep
## 2   4.855019 0.6582781     Indep
## 3   1.836877        NA  mcarERGM
## 20  1.610596 0.6224760  mnarErgm
## 30  1.847450 0.3734943  mnarErgm
## 24  1.860921 0.2396976  mnarErgm

### A section specifically for the three re-estimations of net3 that made it
# no MNAR files, so removing them from the code below

# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()

# set a specific index for the network
chosenNetInd = 3

# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231011_missNetReest_net", chosenNetInd, sep =""), outputList)


# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  
  })
  
   missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    
  # propmiss should be the same
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}

# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
  
  # grab the chosen parameter name
  chosenParaName = modelParaOptions[chosenPara]
  
  # still need to take out true parameter values
  tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
  tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
  
  # including edges
  # test run of the plot function
  testPlotData = prepMissPlot(modelResList = modelResList,
                              modelSeList = modelSeList,
                              missModelList = missModelList,
                              propMissList = propMissList,
                              chosenProp = chunkMissProp,
                              chosenPara = chosenParaName)
  
  # brief inspection just to make sure nothing's wrong
  head(testPlotData) %>% 
    print()
  
  
  # and plot it
  missCaterpillarPlot(plotData = testPlotData,
                      truePara = tempTruePara,
                      trueSe = tempTrueSe,
                      chosenPara = chosenParaName,
                      chosenNetInd = chosenNetInd,
                      chosenProp = chunkMissProp,
                      chosenMiss = chosenMissValue) %>% 
    print()

}
##    parameter SE missModel
## 1  -5.022512 NA  mcarERGM
## 2 -62.259251 NA    Latent

##   parameter SE missModel
## 1  2.548622 NA  mcarERGM
## 2 17.492617 NA    Latent

##   parameter SE missModel
## 1 -1.027786 NA  mcarERGM
## 2 -2.287233 NA    Latent

# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
                            missModelList = missModelList,
                            chosenNetInd = chosenNetInd,
                            chosenProp = chunkMissProp,
                            chosenMiss = chosenMissValue)

# and bind it
failRateData = rbind(failRateData, tempFailData)

10% Todd

Literally nothing. No successful re-estimations for any missingness model for net 3.

35% Todd

# 35% missingness - Todd
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "TOdd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5


# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = 3

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissPropFloat,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
   
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter       SE missModel
## 1 -2.2089033 1.681170     Indep
## 2 -2.7527563 1.471588    Latent
## 3  0.9039891 1.649547  mnarErgm

##   parameter        SE missModel
## 1 -1.112195 0.5174151     Indep
## 2 -1.248764 0.4822099    Latent
## 3 -1.302105 0.4873029  mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##   parameter        SE missModel
## 1  2.437951 0.4559972     Indep
## 2  2.961205 0.5207720    Latent
## 3  1.119011 0.2363328  mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

60% Todd

# 35% missingness - Todd
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "TOdd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6


# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = 3

# note: 
# net 3 is so messed up it has ONE converged estimation for this set 
# 20231010_missNetReest_net3_missModel1_prop1_trial9.RData

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231006_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }

  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter        SE missModel
## 1  -3.962095 0.9023328  mcarERGM
## 2  -1.183762 1.5346827    Latent
## 3 -14.485997        NA  mnarErgm

##    parameter        SE missModel
## 1  0.6043238 0.3158800  mcarERGM
## 2 -0.3741873 0.4741993    Latent
## 3 -1.6860124        NA  mnarErgm

##   parameter        SE missModel
## 1 0.1417893 0.1547958  mcarERGM
## 2 0.5251103 0.1820051    Latent
## 3 9.7456273        NA  mnarErgm

Failure rate plot

Wrangling

# plotting the failure rate
# data frame for the completely failed missingness combinations
# just for the formatting
tempFailData = failRateData[1,]

# todd 10s
tempFailData[1, ] = c(1, 3, 1, 1, "0")
tempFailData[2, ] = c(1, 3, 2, 1, "0")
tempFailData[3, ] = c(1, 3, 3, 1, "0")
tempFailData[4, ] = c(1, 3, 4, 1, "0")

# peter 10s
tempFailData[5, ] = c(1, 1, 1, 1, "NA")
tempFailData[6, ] = c(1, 1, 2, 1, "NA")
tempFailData[7, ] = c(1, 1, 3, 1, "NA")
tempFailData[8, ] = c(1, 3, 2, 1, "NA")
tempFailData[9, ] = c(1, 3, 3, 1, "NA")
tempFailData[10, ] = c(1, 3, 4, 1, "NA")

# todd 35s
tempFailData[11, ] = c(1, 3, 2, 3, "0")

# peter 35s
tempFailData[12, ] = c(1, 1, 1, 3, "NA")
tempFailData[13, ] = c(1, 1, 2, 3, "NA")
tempFailData[14, ] = c(1, 1, 3, 3, "NA")
tempFailData[15, ] = c(1, 3, 1, 3, "NA")
tempFailData[16, ] = c(1, 3, 4, 3, "NA")

# todd 60
tempFailData[17, ] = c(1, 3, 1, 6, "0")

# peter 60
tempFailData[18, ] = c(1, 1, 3, 6, "NA")
tempFailData[19, ] = c(1, 3, 1, 6, "NA")
tempFailData[20, ] = c(1, 3, 4, 6, "NA")

# object type stuff
tempFailData$failRate = as.numeric(tempFailData$failRate) 
tempFailData$netInd = as.numeric(tempFailData$netInd) 
tempFailData$missModel = as.numeric(tempFailData$missModel) 
tempFailData$missProp = as.numeric(tempFailData$missProp) 

# and bind
failRateData = rbind(failRateData, tempFailData)

# check for duplicates
if(any(duplicated(failRateData))){
  
  # take only distinct elements
  failRateData = distinct(failRateData)
}



# give the fail rate data (keep it untouched) a unique variable for the combination of missingness
failRatePlotData = failRateData %>% 
  group_by(missProp, missSave) %>% 
  mutate(missCondition = cur_group_id()) %>%
  ungroup()


# turn variables into factors for plotting
failRatePlotData$missModel = factor(failRatePlotData$missModel, levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm"))
failRatePlotData$missCondition =  factor(failRatePlotData$missCondition, levels = c(2,4,6,1,3,5), labels = c("Miss10", "Miss35", "Miss60", "Zero10", "Zero35", "Zero60"))

Plotting

# subsetting per network because between-network comparisons aren't too informative
# within network comparisons where I have a specific group index for every combination of missSave and missProp (6 categories total)

selectedNets = c(1,2,3,4,5,6)

for(chosenNetInd in selectedNets){

# start with a subset of net 2
failRateSubset = failRatePlotData %>% 
  filter(netInd == chosenNetInd)

# and now we have plot-ready data
print(ggplot(failRateSubset, aes(fill = missModel, y = failRate, x = missCondition)) + 
  geom_bar(position="dodge", stat="identity") + 
  xlab("Missingness condition") + 
  ylab("Failure rate") + 
  ggtitle(paste("Failure rate - Net", chosenNetInd)) + 
  theme_classic() + 
  theme(axis.ticks.x = element_blank()) +
  ylim(0,1) )
  

}

Relative metrics

Relative bias was defined as:

\[rBias = \frac{\widetilde{\theta} - \theta}{\theta},\]

with \(\widetilde{\theta}\) representing the re-estimated parameter value and \(\theta\) representing the true parameter value.

The relative standard error is defined as: \[rSe = \frac{SE(\widetilde{\theta})}{SE(\theta)}.\]

# change some variable types
relBiasData$netInd = as.factor(relBiasData$netInd)
relSeData$netInd = as.factor(relSeData$netInd)

# rename Peter and Todd to Miss and Zero respectively
library(stringr)
relBiasData$missSave = str_replace(string = relBiasData$missSave,
                                   pattern = "Peter",
                                   replacement = "Miss")


relBiasData$missSave = str_replace(string = relBiasData$missSave,
                                   pattern = "Todd",
                                   replacement = "Zero")

# and for standard errors
relSeData$missSave = str_replace(string = relSeData$missSave,
                                   pattern = "Peter",
                                   replacement = "Miss")


relSeData$missSave = str_replace(string = relSeData$missSave,
                                   pattern = "Todd",
                                   replacement = "Zero")

missPropChoices = c(1, 3, 6)
paraChoices = c("edges", "altStar", "gwesp")

# loop it

for(chosenProp in missPropChoices){
  for(chosenPara in paraChoices){
    
    # I don't even know what kind of plot this is...
    # but I need to do some wrangling and subsetting.
    # need to subset some stuff
    relBiasSub = relBiasData %>% 
      filter(propMiss == chosenProp & paraName == chosenPara)
    
    # use the 2.5% and 97.5% quantiles for the axis limits because some of them have some VERY extreme values
    tempBiasBounds = c(quantile(relBiasSub$rBias, probs = c(0.025, 0.975)))
    
    
    # so the plot would be something like
    relBiasPlot = ggplot(relBiasSub, aes(x = netInd, y = rBias, fill = missModel)) + 
                    geom_boxplot(position = position_dodge()) + 
                    facet_wrap(~missSave) + 
                    theme_classic() + 
                    geom_hline(yintercept = 0, col = "black", lty = 2) + 
                    ylim((tempBiasBounds + c(-0.5, 0.5))) +          # and add a bit of wiggle room
                    xlab("Network no.") + 
                    ylab("Relative bias") + 
                    ggtitle(paste("Proportion ", ifelse(chosenProp == 3, yes = 3.5*10, no = chosenProp*10), "% - ", chosenPara, sep = "")) # the ifelse is because of how I've regexed 35% missingness
    
    # print plot
    print(relBiasPlot)
    
    # and another one for variance (technically relative standard error... but yeah)
    relSeSub = relSeData %>% 
      filter(propMiss == chosenProp & paraName == chosenPara)
    
    # just the 97.5% quantile because this is strictly positive
    tempVarUpper = quantile(relSeSub$rSe, probs = c(0.975), na.rm = TRUE)
    
    # so the plot would be something like
    relSePlot = ggplot(relSeSub, aes(x = netInd, y = rSe, fill = missModel)) + 
                    geom_boxplot(position = position_dodge()) + 
                    facet_wrap(~missSave) + 
                    theme_classic() + 
                    geom_hline(yintercept = 1, col = "black", lty = 2) + 
                    ylim(0, tempVarUpper + 0.5) +         # some wiggle room
                    xlab("Network no.") + 
                    ylab("Relative standard error") + 
                    ggtitle(paste("Proportion ", ifelse(chosenProp == 3, yes = 3.5*10, no = chosenProp*10), "% - ", chosenPara, sep = "")) # the ifelse is because of how I've regexed 35% missingness
    
    # print plot
    print(relSePlot)
  
  }
}
## Warning: Removed 40 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 36 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 25 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 51 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 26 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 19 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 38 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 44 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 35 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 50 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 45 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 23 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 76 rows containing non-finite values (`stat_boxplot()`).

# a repeat without the imposed axes just for comparison
for(chosenProp in missPropChoices){
  for(chosenPara in paraChoices){
    
    # I don't even know what kind of plot this is...
    # but I need to do some wrangling and subsetting.
    # need to subset some stuff
    relBiasSub = relBiasData %>% 
      filter(propMiss == chosenProp & paraName == chosenPara)
    
    # so the plot would be something like
    relBiasPlot = ggplot(relBiasSub, aes(x = netInd, y = rBias, fill = missModel)) + 
                    geom_boxplot(position = position_dodge()) + 
                    facet_wrap(~missSave) + 
                    theme_classic() + 
                    geom_hline(yintercept = 0, col = "black", lty = 2) + 
                    xlab("Network no.") + 
                    ylab("Relative bias") + 
                    ggtitle(paste("Proportion ", ifelse(chosenProp == 3, yes = 3.5*10, no = chosenProp*10), "% - ", chosenPara, sep = "")) # the ifelse is because of how I've regexed 35% missingness
    
    # print plot
    print(relBiasPlot)
    
    # and another one for variance (technically relative standard error... but yeah)
    relSeSub = relSeData %>% 
      filter(propMiss == chosenProp & paraName == chosenPara)
    
    # so the plot would be something like
    relSePlot = ggplot(relSeSub, aes(x = netInd, y = rSe, fill = missModel)) + 
                    geom_boxplot(position = position_dodge()) + 
                    facet_wrap(~missSave) + 
                    theme_classic() + 
                    geom_hline(yintercept = 1, col = "black", lty = 2) + 
                    xlab("Network no.") + 
                    ylab("Relative standard error") + 
                    ggtitle(paste("Proportion ", ifelse(chosenProp == 3, yes = 3.5*10, no = chosenProp*10), "% - ", chosenPara, sep = "")) # the ifelse is because of how I've regexed 35% missingness
    
    # print plot
    print(relSePlot)
  
  }
}

## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 6 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 9 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 17 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 49 rows containing non-finite values (`stat_boxplot()`).

Entrainment inspection

As we see in the MNAR ERGM, the parameter estimates are biased in an intuitve fashion when we consider the value of the coefficient for the entrainment parameter.

This section (and estimates) are really to look at how differences in the entrainment parameter can affect parameter estimates and potentially various metrics.

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
# this section only has one network, but I need multiple coefficient sets.
chunkNetworks = 4
chunkCoefSets = c(7, 8, 9, 10, 11, 12, 13, 14, 15)

# TODO: vector of file names for regex
chunkMnarNames = c("20240227_missErgmNetReest_Miss",
                   "20240306_missErgmNetReest_Miss")

# ... add a name indicator to choose what the file name is
nameInd = c(1, 1, 1, 1, 1, 2, 2, 2, 2)

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
coefSetList = list()

# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]

## regex the MNAR files 
# combine the two indices, netOut isnt used because only MNAR ergms are used here (technically not 9 because 0, but yeah.)
combinedNetOutList = c(grep(paste(chunkMnarNames[1], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList),
                       grep(paste(chunkMnarNames[2], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList))

# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
  stop("Duplicate re-estimation datafiles! BAD.")
}

# loop to load
chosenNetOutList = outputList[combinedNetOutList]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  })
  
  # always 'MNAR'. Don't think this'll be used but yeah
  missModelList[[fileInd]] = 4
  
  # and always 35% missingness
  propMissList[[fileInd]] = 3
  
  # I do however need the coefset list
  coefSetList[[fileInd]] = as.integer(sub(".*?_coefSet.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  
  }





# we now have all the objects we need
# let's check how many of each coefset made it
table(unlist(coefSetList))
## 
##  7  8  9 10 11 12 13 14 15 
## 19 34 42 43 44 27 34 38 41
# and now figure out how we're plotting this
# first we need to know which coefsets refer to what value
entrValue = c(-1, -0.5, 0, 0.5, 1, -0.75, -0.25, 0.25, 0.75)

# initialise the final plot data
entrPlotData = tibble()

# next we want to take out the re-estimated coefficients
# this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # including edges
    # I'm bastardising the prepMissPlot function to include the coef set index
    ## Check to make sure all the lists are of the same length
    listLengths = lapply(list(modelResList, modelSeList, missModelList, propMissList, coefSetList), FUN = length)
    
    ## A unit test that spits out an error if there's more than one unique length in the list of lengths
    if( length(unique(listLengths)) != 1 ){
      stop("Lists have differing lengths")
    }
    
    # subset the lists to only be a single chosen proportion
    chosenPropIndex = propMissList == 3
    
    modelResChosenPropList = modelResList[chosenPropIndex]
    modelSeChosenPropList = modelSeList[chosenPropIndex]
    missModelChosenPropList = missModelList[chosenPropIndex]

    # Specify indices for the chosen parameter
    # doing it this way because parameter names get very lengthy
    paraIndex = c(1:3)
    modelParaNames = c("edges", "altStar", "gwesp")
    chosenParaIndex = paraIndex[modelParaNames == chosenParaName]
    
    
    # making the dataset
    plotData = data.frame(parameter = c(unlist(lapply(modelResChosenPropList, `[[`, chosenParaIndex) )), 
                          SE = c(unlist(lapply(modelSeChosenPropList, `[[`, chosenParaIndex))),
                          missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm")),
                          coefSet = unlist(coefSetList))
      
    # brief inspection just to make sure nothing's wrong
    head(plotData) %>% 
      print()
      
      
    # now that we have the 'raw' plot data for a given parameter, let's transform it to a mean and 95% CI to plot
    # basically for each coefset, I want a mean and 95% CI
    tempEntrPlotData = plotData %>% 
      group_by(coefSet) %>%
      summarize(Mean = mean(parameter),
                Lower = quantile(parameter, probs = 0.025),
                Upper = quantile(parameter, probs = 0.975)) %>% 
      add_column(Parameter = chosenParaName)
    
    # and bind the temp datasets
    entrPlotData = bind_rows(entrPlotData, tempEntrPlotData)

  }
##     parameter       SE missModel coefSet
## 1 -0.66292277 1.776960  mnarErgm      10
## 2  3.53737524 5.521055  mnarErgm      10
## 3 -0.01855624 2.498809  mnarErgm      10
## 4  0.64028293 2.548163  mnarErgm      10
## 5 -0.22500862 1.847154  mnarErgm      10
## 6  0.63681549 2.359213  mnarErgm      10
##   parameter        SE missModel coefSet
## 1 -1.420789 0.4377770  mnarErgm      10
## 2 -2.765767 1.3530311  mnarErgm      10
## 3 -1.875441 0.6112890  mnarErgm      10
## 4 -2.055407 0.5953179  mnarErgm      10
## 5 -1.802903 0.4268378  mnarErgm      10
## 6 -1.898855 0.5820178  mnarErgm      10
##   parameter        SE missModel coefSet
## 1  2.153231 0.4786744  mnarErgm      10
## 2  2.658408 0.5277056  mnarErgm      10
## 3  2.712541 0.5185533  mnarErgm      10
## 4  2.731668 0.4606359  mnarErgm      10
## 5  2.665940 0.5015408  mnarErgm      10
## 6  2.419708 0.5298171  mnarErgm      10
# basically want to recode the coefsets into meaningful-ish values.. so:
entrPlotData = entrPlotData %>% 
  mutate(entrValue = recode(coefSet, `7` = -1, `8` = -0.5, `9` = 0, `10` = 0.5, `11` = 1, `12` = -0.75, `13` = -0.25, `14` = 0.25, `15` = 0.75))

# and plot it
entrPlotData %>% 
  filter(Parameter == "edges") %>% 
  ggplot(aes(x = entrValue, y = Mean)) + 
  geom_line() + 
  geom_hline(yintercept = -0.77, col = "darkblue") + 
  geom_ribbon(aes(ymin = Lower, ymax = Upper), alpha = 0.2) +
  labs( x = "Entrainment parameter value", y = "Mean (95% CI) edges estimate" ) +
  ggtitle("Edges") +
  theme_classic()

entrPlotData %>% 
  filter(Parameter == "altStar") %>% 
  ggplot(aes(x = entrValue, y = Mean)) + 
  geom_line() + 
  geom_hline(yintercept = -1.88, col = "darkblue") + 
  geom_ribbon(aes(ymin = Lower, ymax = Upper), alpha = 0.2)+
  labs( x = "Entrainment parameter value", y = "Mean (95% CI) altStar estimate")+ 
  ggtitle("AltStar") +
  theme_classic()

entrPlotData %>% 
  filter(Parameter == "gwesp") %>% 
  ggplot(aes(x = entrValue, y = Mean)) + 
  geom_line() + 
  geom_hline(yintercept = 3.10, col = "darkblue") + 
  geom_ribbon(aes(ymin = Lower, ymax = Upper), alpha = 0.2)+
  labs( x = "Entrainment parameter value", y = "Mean (95% CI) gwesp estimate")+ 
  ggtitle("Gwesp") +
  theme_classic()